New York City Trees

In [1]:
import pandas as pd
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np
import geopandas as gp
import shapely
import bokeh, bokeh.plotting, bokeh.models
from bokeh.io import output_notebook, show
output_notebook()
%matplotlib inline
Loading BokehJS ...

Read in the Tree Data csv file into a pandas dataframe

In [2]:
Trees = pd.read_csv("2015_Street_Tree_Census_-_Tree_Data.csv",low_memory=False)
In [3]:
Trees.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 683788 entries, 0 to 683787
Data columns (total 45 columns):
tree_id             683788 non-null int64
block_id            683788 non-null int64
created_at          683788 non-null object
tree_dbh            683788 non-null int64
stump_diam          683788 non-null int64
curb_loc            683788 non-null object
status              683788 non-null object
health              652172 non-null object
spc_latin           652169 non-null object
spc_common          652169 non-null object
steward             652173 non-null object
guards              652172 non-null object
sidewalk            652172 non-null object
user_type           683788 non-null object
problems            652124 non-null object
root_stone          683788 non-null object
root_grate          683788 non-null object
root_other          683788 non-null object
trunk_wire          683788 non-null object
trnk_light          683788 non-null object
trnk_other          683788 non-null object
brch_light          683788 non-null object
brch_shoe           683788 non-null object
brch_other          683788 non-null object
address             683788 non-null object
postcode            683788 non-null int64
zip_city            683788 non-null object
community board     683788 non-null int64
borocode            683788 non-null int64
borough             683788 non-null object
cncldist            683788 non-null int64
st_assem            683788 non-null int64
st_senate           683788 non-null int64
nta                 683788 non-null object
nta_name            683788 non-null object
boro_ct             683788 non-null int64
state               683788 non-null object
latitude            683788 non-null float64
longitude           683788 non-null float64
x_sp                683788 non-null float64
y_sp                683788 non-null float64
council district    677269 non-null float64
census tract        677269 non-null float64
bin                 674229 non-null float64
bbl                 674229 non-null float64
dtypes: float64(8), int64(11), object(26)
memory usage: 234.8+ MB

Read in the shape file into a GeoDataFrame

In [4]:
hoods = gp.GeoDataFrame.from_file("C:/Users/julie/Documents/Kaggle/Trees/Neighborhood Tabulation Areas/")
In [5]:
hoods.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 195 entries, 0 to 194
Data columns (total 8 columns):
county_fip    195 non-null object
shape_area    195 non-null float64
ntacode       195 non-null object
ntaname       195 non-null object
shape_leng    195 non-null float64
boro_name     195 non-null object
boro_code     195 non-null float64
geometry      195 non-null object
dtypes: float64(3), object(5)
memory usage: 12.3+ KB

Convert the ntaname into a integer code that can be mapped by Bokeh

In [6]:
hoods["nta_num"]= pd.Categorical(hoods.ntaname).codes

Create the Bokeh interactive plot for neighborhooods

In [7]:
##convert the geodataframe into a bokeh data source
gjds = bokeh.models.GeoJSONDataSource(geojson=hoods.to_json())
##input the tools you wish to use in your plot
TOOLS = "pan,wheel_zoom,reset,hover,save"


##create the plot
p = bokeh.plotting.figure(title="NYC Neighborhoods", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,sizing_mode='fixed')

##choose your color scheme to map
color_mapper = bokeh.models.LinearColorMapper(palette=bokeh.palettes.Category20[20])


##map the color scheme to each of the neighborhoods
p.patches('xs', 'ys', 
          fill_color={'field': 'nta_num', 'transform': color_mapper},
          fill_alpha=1., line_color="black", line_width=0.5,          
          source=gjds)

##Remove the grid
p.grid.grid_line_color = None


##Create the interactive hovertool
hover = p.select_one(bokeh.models.HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = u"""
<div> 
    <div class="bokeh_hover_tooltip">Name : @ntaname</div>
    <div class="bokeh_hover_tooltip">Borough : @boro_name</div>
    <div class="bokeh_hover_tooltip">Zone ID : @county_fip</div>
    <div class="bokeh_hover_tooltip">(Lon, Lat) : ($x ËšE, $y ËšN)</div>
</div>
"""


show(p)
In [8]:
from geopandas import GeoDataFrame
from shapely.geometry import Point

geometry = [Point(xy) for xy in zip(Trees.longitude, Trees.latitude)]
Trees = Trees.drop(['longitude', 'latitude'], axis=1)
crs = {'init': 'epsg:4326'}
Trees = GeoDataFrame(Trees, crs=crs, geometry=geometry)
In [9]:
Trees = gp.sjoin(Trees, hoods, how="inner", op='intersects')
In [10]:
hoods["trees"] = hoods["ntaname"].map(Trees.groupby("ntaname").size().to_dict())
In [11]:
hoods["blocks"] = hoods["shape_area"] / 100000
In [12]:
hoods["tree_block"] = hoods["trees"] / hoods["blocks"]
In [13]:
gjds = bokeh.models.GeoJSONDataSource(geojson=hoods.to_json())
TOOLS = "pan,wheel_zoom,reset,hover,save"

p = bokeh.plotting.figure(title="NYC Neighborhoods", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,sizing_mode='fixed')

color_mapper = bokeh.models.LinearColorMapper(bokeh.palettes.Greens[(5)][::-1])

p.patches('xs', 'ys', 
          fill_color={'field': 'tree_block', 'transform': color_mapper},
          fill_alpha=1., line_color="black", line_width=0.5,          
          source=gjds)

p.grid.grid_line_color = None
p.background_fill_color = "beige"
p.background_fill_alpha = .05
hover = p.select_one(bokeh.models.HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = u"""
<div> 
    <div class="bokeh_hover_tooltip">Name : @ntaname</div>
    <div class="bokeh_hover_tooltip">Borough : @boro_name</div>
    <div class="bokeh_hover_tooltip">Zone ID : @county_fip</div>
    <div class="bokeh_hover_tooltip">Trees per Block : @tree_block</div>
    <div class="bokeh_hover_tooltip">(Lon, Lat) : ($x ËšE, $y ËšN)</div>
</div>
"""


show(p)
In [14]:
Trees.groupby(["ntaname","status"]).size()
Out[14]:
ntaname                                     status
Airport                                     Alive       155
                                            Dead          6
                                            Stump         7
Allerton-Pelham Gardens                     Alive      3609
                                            Dead         99
                                            Stump       144
Annadale-Huguenot-Prince's Bay-Eltingville  Alive     12538
                                            Dead        197
                                            Stump       242
Arden Heights                               Alive      6729
                                            Dead         97
                                            Stump       168
Astoria                                     Alive      4183
                                            Dead         72
                                            Stump       133
Auburndale                                  Alive      5039
                                            Dead         54
                                            Stump       139
Baisley Park                                Alive      4545
                                            Dead         84
                                            Stump       179
Bath Beach                                  Alive      1671
                                            Dead         25
                                            Stump        49
Battery Park City-Lower Manhattan           Alive      1266
                                            Dead         30
                                            Stump        16
Bay Ridge                                   Alive      6606
                                            Dead        105
                                            Stump       191
                                                      ...  
Williamsburg                                Alive      1595
                                            Dead         23
                                            Stump        25
Windsor Terrace                             Alive      2030
                                            Dead         13
                                            Stump        40
Woodhaven                                   Alive      3957
                                            Dead         80
                                            Stump       115
Woodlawn-Wakefield                          Alive      3802
                                            Dead         57
                                            Stump        87
Woodside                                    Alive      2610
                                            Dead         34
                                            Stump        76
Yorkville                                   Alive      2133
                                            Dead         47
                                            Stump        89
park-cemetery-etc-Bronx                     Alive      1402
                                            Dead         42
                                            Stump        71
park-cemetery-etc-Brooklyn                  Alive      1905
                                            Dead         20
                                            Stump        55
park-cemetery-etc-Manhattan                 Alive      1233
                                            Dead          9
                                            Stump         9
park-cemetery-etc-Queens                    Alive      2050
                                            Dead         36
                                            Stump       176
Length: 579, dtype: int64
In [15]:
hoods["green_trees"] = hoods["ntaname"].map(Trees[Trees["status"] == "Alive"].groupby("ntaname").size().to_dict()) 
In [16]:
hoods["green_block"] = hoods["green_trees"] / hoods["blocks"]
In [26]:
gjds = bokeh.models.GeoJSONDataSource(geojson=hoods.to_json())
TOOLS = "pan,wheel_zoom,reset,hover,save"

p = bokeh.plotting.figure(title="NYC Neighborhoods", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,sizing_mode='fixed')

color_mapper = bokeh.models.LinearColorMapper(bokeh.palettes.RdYlGn[(9)][::-1])

p.patches('xs', 'ys', 
          fill_color={'field': 'green_block', 'transform': color_mapper},
          fill_alpha=1., line_color="black", line_width=0.5,          
          source=gjds)

p.grid.grid_line_color = None
p.background_fill_color = "beige"
p.background_fill_alpha = .05
hover = p.select_one(bokeh.models.HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = u"""
<div> 
    <div class="bokeh_hover_tooltip">Name : @ntaname</div>
    <div class="bokeh_hover_tooltip">Borough : @boro_name</div>
    <div class="bokeh_hover_tooltip">Zone ID : @county_fip</div>
    <div class="bokeh_hover_tooltip">Trees per Block : @green_block</div>
    <div class="bokeh_hover_tooltip">(Lon, Lat) : ($x ËšE, $y ËšN)</div>
</div>
"""


show(p)